/************************************************************************
* utf8.c
* voxelands - 3d voxel world sandbox game
* Copyright (C) Lisa 'darkrose' Milne 2016 <lisa@ltmnet.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>
*
* Modified from original Public Domain sources by Jeff Bezanson
************************************************************************/

#include "common.h"

#include <string.h>

static const uint32_t utf8_offsets[6] = {
	0x00000000UL, 0x00003080UL, 0x000E2080UL,
	0x03C82080UL, 0xFA082080UL, 0x82082080UL
};

static const char utf8_trailing_bytes[256] = {
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};

/* is c the start of a utf8 sequence? */
#define isutf8(c) (((c)&0xC0)!=0x80)

/* returns length of next utf-8 sequence */
int utf8_seqlen(char* str)
{
	return utf8_trailing_bytes[(unsigned int)(unsigned char)str[0]] + 1;
}

/* reads the next utf-8 sequence out of a string, updating an index */
uint32_t utf8_nextchar(char* str, int *i)
{
	uint32_t ch = 0;
	int sz = 0;

	do {
		ch <<= 6;
		ch += (unsigned char)str[(*i)++];
		sz++;
	} while (str[*i] && !isutf8(str[*i]));

	ch -= utf8_offsets[sz-1];

	return ch;
}

/* get a single utf32 char from a utf8 string */
uint32_t utf8_toutf32(char* src, int size)
{
	char buff[6];
	int i;
	uint32_t ch = 0;

	if (!size)
		return 0;

	if (size < 0)
		size = strlen(src);

	if (size > 5)
		size = 5;

	strncpy(buff,src,size);
	buff[size] = 0;

	i = 0;

	ch = utf8_nextchar(buff,&i);

	return ch;
}

/* convert a single utf32 char to a utf8 string */
int utf8_fromutf32(char *dest, int sz, uint32_t ch)
{
	int i = 0;
	char *dest_end = dest + sz;

	if (ch < 0x80) {
		if (dest >= dest_end)
			return i;
		*dest++ = (char)ch;
	}else if (ch < 0x800) {
		if (dest >= dest_end-1)
			return i;
		*dest++ = (ch>>6) | 0xC0;
		*dest++ = (ch & 0x3F) | 0x80;
	}else if (ch < 0x10000) {
		if (dest >= dest_end-2)
			return i;
		*dest++ = (ch>>12) | 0xE0;
		*dest++ = ((ch>>6) & 0x3F) | 0x80;
		*dest++ = (ch & 0x3F) | 0x80;
	}else if (ch < 0x110000) {
		if (dest >= dest_end-3)
			return i;
		*dest++ = (ch>>18) | 0xF0;
		*dest++ = ((ch>>12) & 0x3F) | 0x80;
		*dest++ = ((ch>>6) & 0x3F) | 0x80;
		*dest++ = (ch & 0x3F) | 0x80;
	}
	if (dest < dest_end)
		*dest = 0;
	return i;
}

/* needed because windows - get a single utf32 char from utf16 */
uint32_t utf16_toutf32(uint16_t *str)
{
	uint32_t ch = 0;
	uint16_t s0 = 0;
	uint16_t s1 = 0;

	if (!str || !str[0])
		return 0;

	s0 = str[0];
	s1 = str[1];

	if ((s0-0xD800) >- 2048) {
		ch = s0;
	}else if (s1 && (s0&0xFFFFFC00) == 0xD800 && (s1&0xFFFFFC00) == 0xDC00) {
		ch = (s0 << 10) + s1 - 0x35fdc00;
        }

	return ch;
}

/* char index to byte offset */
int utf8_offset(char* str, int i)
{
	int offset = 0;

	while (i > 0 && str[offset]) {
		(void)(isutf8(str[++offset]) || isutf8(str[++offset]) || isutf8(str[++offset]) || ++offset);
		i--;
	}

	return offset;
}

/* byte offset to charindex */
int utf8_charindex(char* str, int o)
{
	int i = 0;
	int offset = 0;

	while (offset < o && str[offset]) {
		(void)(isutf8(str[++offset]) || isutf8(str[++offset]) || isutf8(str[++offset]) || ++offset);
		i++;
	}

	return i;
}

/* number of characters */
int utf8_strlen(char* str)
{
	int count = 0;
	int i = 0;

	while (utf8_nextchar(str, &i) != 0) {
		count++;
	}

	return count;
}

/* increment i by one character index */
void utf8_inc(char* str, int *i)
{
	(void)(isutf8(str[++(*i)]) || isutf8(str[++(*i)]) || isutf8(str[++(*i)]) || ++(*i));
}

/* decrement i by one character index */
void utf8_dec(char* str, int *i)
{
	(void)(isutf8(str[--(*i)]) || isutf8(str[--(*i)]) || isutf8(str[--(*i)]) || --(*i));
}

/* strchr() for utf8 */
char* utf8_strchr(char* str, uint32_t ch, int *charn)
{
	int i = 0;
	int lasti = 0;
	uint32_t c;

	if (charn)
		*charn = 0;

	while (str[i]) {
		c = utf8_nextchar(str, &i);
		if (c == ch)
			return &str[lasti];
		lasti = i;
		if (charn)
			(*charn)++;
	}

	return NULL;
}

/* memchr() for utf8 */
char* utf8_memchr(char* str, uint32_t ch, size_t sz, int *charn)
{
	int i = 0;
	int lasti = 0;
	uint32_t c;
	int csz;

	if (charn)
		*charn = 0;

	while (i < sz) {
		c = csz = 0;
		do {
			c <<= 6;
			c += (unsigned char)str[i++];
			csz++;
		} while (i < sz && !isutf8(str[i]));

		c -= utf8_offsets[csz-1];

		if (c == ch)
			return &str[lasti];
		lasti = i;
		if (charn)
			(*charn)++;
	}

	return NULL;
}
